Status Quo
library(tidyverse)
births_path <- "https://raw.githubusercontent.com/EvaMaeRey/tableau/9e91c2b5ee803bfef10d35646cf4ce6675b92b55/tidytuesday_data/2018-10-02-us_births_2000-2014.csv"
library(ggcalendar)
readr::read_csv(births_path) %>%
mutate(month = str_pad(month, 2, pad = "0"),
date_of_month = str_pad(date_of_month, 2, pad = "0")) %>%
mutate(date = paste(year, month, date_of_month, sep = "-") %>% as_date()) %>%
mutate(ind_holiday =
(month == "12" & date_of_month %in% 24:31) |
(month == "07" & date_of_month == "04") |
(month == "01" & date_of_month == "01") |
(month == "10" & date_of_month == "31") |
(month == "11" & date_of_month %in% 20:30)
) |>
mutate(date_in_2020 = paste(2020, month, date_of_month, sep = "-") %>% as_date()) |>
mutate(ind_weekend = wday(date) == 1 | wday(date) == 7) |>
mutate(ind_Feb_29th = month(date) == 2 & day(date) == 29) |>
mutate(ind_13th = day(date) == 13) |>
mutate(ind_Fri13th = wday(date) == 6 & day(date) == 13) ->
births_df
## Rows: 5479 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (5): year, month, date_of_month, day_of_week, births
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
births_df %>%
filter(year == 2012) %>%
ggcalendar() +
aes(date = date) +
geom_point_calendar() +
aes(size = births) +
aes(color = births) +
geom_text_calendar(aes(label = day(date)), color = "oldlace", size = 2) +
guides(
colour = guide_legend("Births"),
size = guide_legend("Births")
) +
geom_point_calendar(data = data.frame(date =
as_date("2012-12-25")),
size = 5, color = "red", shape = 21) +
scale_color_viridis_c() +
labs(title = "The year in 2000 in births")

ggchalkboard:::geoms_chalk_on()
ggplot(births_df) +
aes(x = births) +
geom_histogram() +
ggxmean::geom_x_mean() +
ggxmean::geom_x_mean_label() +
geom_rug(alpha = .2) +
labs(x = "Number of births") +
ggchalkboard::theme_chalkboard() +
labs(title = "Distribution of Number of Births in the U.S. each day from 2000-2014" %>% str_wrap(45))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Using the `size` aesthetic with geom_segment was deprecated in ggplot2 3.4.0.
## ℹ Please use the `linewidth` aesthetic instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

last_plot() +
facet_wrap(~ind2cat::ind_recode(ind_weekend), ncol = 1) +
labs(title = "We explore the bimodal distribution looking at 'weekend effects'" %>% str_wrap(45))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

last_plot() %+%
facet_wrap(~ind2cat::ind_recode(ind_holiday), ncol = 1)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

(last_plot() +
facet_wrap(~ind2cat::ind_recode(ind_weekend), ncol = 1) +
labs(title = "We explore the bimodal distribution looking at 'weekend effects'" %>% str_wrap(45))) %+%
(births_df |> filter(!ind_holiday))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

last_plot() +
facet_wrap(~ind2cat::ind_recode(ind_13th, rev = T), ncol = 1) +
labs(title = "We also look supersticion around the number 13 might impact number of births" %>% str_wrap(45))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

last_plot() +
facet_grid(wday(date, label = T) ~
ind2cat::ind_recode(ind_13th, rev = T)) +
labs(title = "For fun, we break the data up by day of the week and 13th" %>% str_wrap(45))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

last_plot() +
facet_grid(wday(date, label = T) ~
ind2cat::ind_recode(ind_Feb_29th)) +
labs(title = "Finally, we turn to Feb 29th - how does birth rate compare to other days?" %>% str_wrap(45))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

last_plot() %+%
(births_df |>
filter(date_in_2020 >= as.Date("2020-02-26")) |>
filter(date_in_2020 <= as.Date("2020-03-03"))
) +
geom_rug() +
labs(title = "We narrow our comparisons to February 26 to March 3 in each year" %>% str_wrap(45)) +
labs(subtitle = "Between Feb 27 and March 3rd by day of week and Feb 19 indicator " %>% str_wrap(55))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

births_df |>
filter(!ind_holiday) |>
filter(!ind_13th) |>
ggplot() +
aes(x = date_in_2020, y = births) +
geom_line() +
aes(color = year) +
facet_wrap(~wday(date, label = T)) +
# geom_vline(xintercept = as.Date("2020-02-29")) +
geom_smooth()
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation:
## colour.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
## The following aesthetics were dropped during statistical transformation:
## colour.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
## The following aesthetics were dropped during statistical transformation:
## colour.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
## The following aesthetics were dropped during statistical transformation:
## colour.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
## The following aesthetics were dropped during statistical transformation:
## colour.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
## The following aesthetics were dropped during statistical transformation:
## colour.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
## The following aesthetics were dropped during statistical transformation:
## colour.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?

lm(births ~ year + month, births_df)
##
## Call:
## lm(formula = births ~ year + month, data = births_df)
##
## Coefficients:
## (Intercept) year month02 month03 month04 month05
## 76258.36 -32.56 236.40 215.86 115.10 264.21
## month06 month07 month08 month09 month10 month11
## 565.34 812.54 1005.55 1090.30 495.22 221.76
## month12
## 262.03
# ggchalkboard:::geoms_chalk_off()
births_df |>
filter(date_in_2020 >= as.Date("2020-02-24")) |>
filter(date_in_2020 <= as.Date("2020-03-05")) |>
ggplot() +
aes(x = date_in_2020, y = births) +
geom_line(color = "black", alpha= .2) +
geom_point(aes(shape = ind_weekend,
color = ind_Feb_29th,
size = ind_Feb_29th)) +
geom_text(aes(label = wday(date, label = T)),
vjust = -0.2) +
# geom_text(aes(label = births),
# vjust = 1.2) +
facet_wrap(~year)
## Warning: Using size for a discrete variable is not advised.

last_plot() +
aes(linetype = ind_weekend)
## Warning: Using size for a discrete variable is not advised.
